{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load your dataset to Argilla"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install argilla datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import argilla as rg\n",
    "\n",
    "HF_TOKEN = \"...\"  # only for private spaces\n",
    "\n",
    "client = rg.Argilla(\n",
    "    api_url=\"...\",\n",
    "    api_key=\"...\",\n",
    "    headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"},  # only for private spaces\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'text': Value(dtype='string', id=None),\n",
       " 'label': Value(dtype='int64', id=None),\n",
       " 'label_text': Value(dtype='string', id=None)}"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from datasets import load_dataset\n",
    "\n",
    "data = load_dataset(\"SetFit/ag_news\", split=\"train\")\n",
    "data.features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "settings = rg.Settings(\n",
    "    fields=[rg.TextField(name=\"text\")],\n",
    "    questions=[\n",
    "        rg.LabelQuestion(\n",
    "            name=\"label\", title=\"Classify the text:\", labels=data.unique(\"label_text\")\n",
    "        ),\n",
    "        rg.SpanQuestion(\n",
    "            name=\"entities\",\n",
    "            title=\"Highlight all the entities in the text:\",\n",
    "            labels=[\"PERSON\", \"ORG\", \"LOC\", \"EVENT\"],\n",
    "            field=\"text\",\n",
    "        ),\n",
    "    ],\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = rg.Dataset(name=\"ag_news\", settings=settings)\n",
    "\n",
    "dataset.create()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset.records.log(data, mapping={\"label_text\": \"label\"})"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "name": "Load your dataset to Argilla",
   "provenance": []
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
